library(tidyverse)
library(httr)

res=GET('https://developer.nps.gov/api/v1/parks?limit=500&api_key=B9nDpbkbrb3kSOjz6kXSxMJ3d6MSpUvt1QqYdeyn')



data = res %>% content("text") %>% jsonlite::fromJSON() %>% as_tibble()

NPS_data=data %>% unnest(data) %>% unnest(activities,names_sep = '_') %>% unnest(entranceFees,names_sep = '_')
visitation_data <- 
  read_csv("data/Query Builder for Public Use Statistics (1979 - Last Calendar Year).csv") %>% 
  janitor::clean_names()
## Rows: 101419 Columns: 35
## ── Column specification ──────────────────────────────────────────────
## Delimiter: ","
## chr (10): ParkName, UnitCode, ParkType, Region, State, ParkNameTotal, UnitCo...
## dbl  (3): Year, Month, YearTotal
## num (22): RecreationVisits, NonRecreationVisits, RecreationHours, NonRecreat...
## 
## ℹ Use `spec()` to retrieve the full column specification for this data.
## ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
visitation_data %>% 
  group_by(park_type, month) %>% 
  summarize(total_visitation = sum(recreation_visits)) %>% 
<<<<<<< HEAD
plot_ly(x = ~as.factor(month), y = ~total_visitation, type = "scatter", mode = "lines", color = ~park_type)
visitation_data %>% 
  group_by(park_type) %>% 
  summarize(parks = n_distinct(full_name)) %>%
  arrange(desc(parks))  %>%  
  knitr::kable()
park_type parks
National Monument 73
National Historic Site 72
National Historical Park 58
National Park 55
National Memorial 32
National Recreation Area 18
Park (Other) 11
National Battlefield 10
National Seashore 10
National Military Park 9
National Preserve 7
National Wild & Scenic River 7
National Battlefield Park 4
National Parkway 4
National River 4
National Lakeshore 3
International Historic Site 1
National Reserve 1

As seen in the graph, national parks have the highest total visitation among all park types. Interestingly, there more national monuments, national historic sites, and national historic parks than national parks, even though all of these have less total visitation than national parks.

Total Visitation by Visitation Type and Season

The visitation_data data set also includes information on visitation split by tent_campers, backcountry, and rv_campers. We were interested in looking at the total visitation split by these three visitation types and by season.

visitation_data %>% 
  mutate(season = case_when(
    month %in% c(12,1,2) ~ "Winter", 
    month %in% c(3,4,5) ~ "Spring", 
    month %in% c(6,7,8) ~ "Summer", 
    TRUE ~ "Fall"
  )) %>% 
  group_by(season) %>% 
  summarize(total_tent = sum(tent_campers), 
            backcountry_visits = sum(backcountry), 
            total_rv = sum(rv_campers)) %>% 
  pivot_longer(
    total_tent:total_rv, 
    values_to = "total_visit", 
    names_to = "type_visit",
    names_prefix = "total_"
  ) %>% 
  plot_ly(x = ~season, y = ~total_visit, color = ~type_visit, type = "bar")

In fall, winter, and spring, the highest visitation is tent_campers, with a peak summer. Interestingly and unsurprisingly, the highest visitation in winter is rv_campers. This is probably due to weather conditions not permitting tent camping, but allows for rv camping. For fall, winter, and spring there is a similar trend where backcountry has the lowest total visitation and tent_campers has the highest visitation. In winter, rv_campers has the highest total visitation, then tent_campers, and then backcountry.

Average Park Visitation by Season and Park Type

Next, we were interested in seeing the trends in park visitation by season and park type.

park_types <- unique(visitation_data$park_type)
plots <- list()
for (i in seq_along(park_types)) {
plots[[i]] <- visitation_data %>% 
    filter(park_type == park_types[i]) %>% 
    mutate(season = case_when(
      month %in% c(12, 1, 2) ~ "Winter", 
      month %in% c(3, 4, 5) ~ "Spring", 
      month %in% c(6, 7, 8) ~ "Summer", 
      TRUE ~ "Fall"
    )) %>% 
    group_by(season) %>%
    summarize(avg_visits = mean(recreation_visits, na.rm = TRUE)) %>%
    ggplot(aes(x = season, y = avg_visits, fill = season)) +
    geom_col() +
    ggtitle(paste( park_types[i])) 
} 

Most of the parks have a similar trend in average visitation, where summer is the highest and winter is the lowest.

Average Visitation by Visit and Park Type

park_types <- unique(visitation_data$park_type)
plots <- list()

for (i in seq_along(park_types)) {
  visit_summary <- visitation_data %>% 
    drop_na() %>% 
    filter(park_type == park_types[i]) %>% 
    mutate(season = case_when(
      month %in% c(12, 1, 2) ~ "Winter", 
      month %in% c(3, 4, 5) ~ "Spring", 
      month %in% c(6, 7, 8) ~ "Summer", 
      TRUE ~ "Fall"
    )) %>% 
    group_by(season) %>% 
    summarize(
      mean_tent = mean(tent_campers, na.rm = TRUE), 
      mean_backcountry = mean(backcountry, na.rm = TRUE), 
      mean_rv = mean(rv_campers, na.rm = TRUE)
    ) %>% 
    pivot_longer(
      cols = starts_with("mean_"), 
      names_to = "type_visit", 
      values_to = "mean", 
      names_prefix = "mean_"
    ) 
  plots[[i]] <- ggplot(visit_summary, aes(x = season, y = mean)) + 
    geom_col() + 
    facet_grid(~type_visit) +
    ggtitle(paste("Average Visits for", park_types[i])) + 
    theme(axis.text.x = element_text(angle = 90, vjust = 0.5, hjust = 1))
} 

No Visitation

plots[[4]] + plots[[8]] + plots[[17]] + plots[[18]]

National Historic Sites, National Memorials, National Battlefield Parks, International Historic Sites all had no faceted average visits. These four park types do not have any rv_campers, backcountry or tent_campers.

Only Backcountry Visitation

plots[[16]] + plots[[15]] + plots[[6]]

National Reserve only had backcountry visitation in spring. National Military Park had backcountry visitation in all seasons, with mean visitation being highest in spring. National Battlefield had backcountry visitation in all seasons, with mean visitation being lowest in summer and highest in spring.

No RV Visitation

plots[13]
## [[1]]

National Wild and Scienic River has backcountry and tent visitation, no rv visitation. Interestingly though, the mean tent visitation is low for all seasons, with a slight peak in summer.

Park (Other) Visitation

plots[14]
## [[1]]

This plot was interesting because the park type Park (Other) has high tent mean visitation, rv visitation, but low backcountry visitation. It is the highest in spring and summer.

map of all national parks in US

regional_data <- NPS_data %>% 
  mutate(region = case_when(
    states %in% c("CT", "RI", "NH", "VT", "NJ", "NY", "PA", "MD", "ME", "MA") ~ "northeast", 
    states %in% c("IL","IN", "MI", "OH", "WI", "IA", "KS", "MN", "MO", "NE", "ND", "SD") ~ "midwest", 
    states %in% c("FL", "GA", "NC", "SC", "VA", "DE", "WV", "AL", "KY", "MS", "TN", "AR", "LA", "OK", "TX", "DC") ~ "south", 
    states %in% c("AK", "CA", "HI", "OR", "WA", "AZ", "CO", "ID", "MT", "NV", "NM", "UT", "WY") ~ "west",
    states %in% c("VI", "AS", "GU", "PR") ~ "u.s. territory",
    TRUE ~ "no state data"
  ))

g <- list(
  scope = 'usa',
  projection = list(type = 'albers usa'),
  showland = TRUE,
  landcolor = toRGB("gray95"),
  subunitcolor = toRGB("gray85"),
  countrycolor = toRGB("gray85"),
  countrywidth = 0.5,
  subunitwidth = 0.5
)

fig <- plot_geo(regional_data, lat = ~latitude, lon = ~longitude)
fig <- fig %>% add_markers(
    text = ~paste(full_name, states, sep = "<br />"),
    color = ~region, symbol = I("circle"), size = I(8), hoverinfo = "text"
  )

fig <- fig %>% layout(
    title = 'US National Parks', geo = g
  )

fig

Regional Comparisons

region_data_vists <- visitation_data %>% 
  mutate(region = case_when(
    state %in% c("CT", "RI", "NH", "VT", "NJ", "NY", "PA", "MD", "ME", "MA") ~ "northeast", 
    state %in% c("IL","IN", "MI", "OH", "WI", "IA", "KS", "MN", "MO", "NE", "ND", "SD") ~ "midwest", 
    state %in% c("FL", "GA", "NC", "SC", "VA", "DE", "WV", "AL", "KY", "MS", "TN", "AR", "LA", "OK", "TX", "DC") ~ "south", 
    state %in% c("AK", "CA", "HI", "OR", "WA", "AZ", "CO", "ID", "MT", "NV", "NM", "UT", "WY") ~ "west",
    state %in% c("VI", "AS", "GU", "PR") ~ "u.s. territory",
    TRUE ~ "no state data"
  ))
region_data <- combined_data %>% 
  mutate(region = case_when(
    state %in% c("CT", "RI", "NH", "VT", "NJ", "NY", "PA", "MD", "ME", "MA") ~ "northeast", 
    state %in% c("IL","IN", "MI", "OH", "WI", "IA", "KS", "MN", "MO", "NE", "ND", "SD") ~ "midwest", 
    state %in% c("FL", "GA", "NC", "SC", "VA", "DE", "WV", "AL", "KY", "MS", "TN", "AR", "LA", "OK", "TX", "DC") ~ "south", 
    state %in% c("AK", "CA", "HI", "OR", "WA", "AZ", "CO", "ID", "MT", "NV", "NM", "UT", "WY") ~ "west",
    state %in% c("VI", "AS", "GU", "PR") ~ "u.s. territory",
    state == TRUE ~ "no state data"
  )) %>% select(c(-full_name.y, -topics_id, -topics_name, -activities_id))

Northeast

region_data %>%
  filter(region == "northeast") %>% 
  distinct(activities_name) %>% 
  head() %>% knitr::kable()
activities_name
Arts and Culture
Cultural Demonstrations
Astronomy
Stargazing
Biking
Boating
northeast_plot <- region_data %>%
  filter(region == "northeast") %>% 
  mutate(season = case_when(
      month %in% c(12, 1, 2) ~ "Winter", 
      month %in% c(3, 4, 5) ~ "Spring", 
      month %in% c(6, 7, 8) ~ "Summer", 
      TRUE ~ "Fall"
    )) %>% 
    group_by(season) %>% 
    summarize(
      mean_tent = mean(tent_campers, na.rm = TRUE), 
      mean_backcountry = mean(backcountry, na.rm = TRUE), 
      mean_rv = mean(rv_campers, na.rm = TRUE)
    ) %>% 
    pivot_longer(
      cols = starts_with("mean_"), 
      names_to = "type_visit", 
      values_to = "mean", 
      names_prefix = "mean_"
    ) %>% ggplot(aes(x = season, y = mean)) + 
  geom_col() +  
  facet_grid(~type_visit) + theme(axis.text.x = element_text(angle = 90, vjust = 0.5, hjust = 1)) + 
  labs(title = "Visitation in Northeast by Season", 
       x = "Season", 
       y = "Average Visitation")
northeast_plot

Midwest

region_data %>%
  filter(region == "midwest") %>% distinct(activities_name) %>% 
  head() %>%  knitr::kable()
activities_name
Arts and Culture
Cultural Demonstrations
Astronomy
Stargazing
Food
Picnicking
midwest_plot <- 
  region_data %>%
  filter(region == "midwest") %>% 
  mutate(season = case_when(
      month %in% c(12, 1, 2) ~ "Winter", 
      month %in% c(3, 4, 5) ~ "Spring", 
      month %in% c(6, 7, 8) ~ "Summer", 
      TRUE ~ "Fall"
    )) %>% 
    group_by(season) %>% 
    summarize(
      mean_tent = mean(tent_campers, na.rm = TRUE), 
      mean_backcountry = mean(backcountry, na.rm = TRUE), 
      mean_rv = mean(rv_campers, na.rm = TRUE)
    ) %>% 
    pivot_longer(
      cols = starts_with("mean_"), 
      names_to = "type_visit", 
      values_to = "mean", 
      names_prefix = "mean_"
    ) %>% ggplot(aes(x = season, y = mean)) + 
  geom_col() +  
  facet_grid(~type_visit) + theme(axis.text.x = element_text(angle = 90, vjust = 0.5, hjust = 1)) + 
  labs(title = "Visitation in Midwest by Season", 
       x = "Season", 
       y = "Average Visitation")
midwest_plot

South

region_data %>%
  filter(region == "south") %>%
  distinct(activities_name) %>% 
  head() %>%  
  knitr::kable()
activities_name
Astronomy
Stargazing
Food
Picnicking
Guided Tours
Self-Guided Tours - Walking
south_plot <- 
  region_data %>%
  filter(region == "south") %>% 
  mutate(season = case_when(
      month %in% c(12, 1, 2) ~ "Winter", 
      month %in% c(3, 4, 5) ~ "Spring", 
      month %in% c(6, 7, 8) ~ "Summer", 
      TRUE ~ "Fall"
    )) %>% 
    group_by(season) %>% 
    summarize(
      mean_tent = mean(tent_campers, na.rm = TRUE), 
      mean_backcountry = mean(backcountry, na.rm = TRUE), 
      mean_rv = mean(rv_campers, na.rm = TRUE)
    ) %>% 
    pivot_longer(
      cols = starts_with("mean_"), 
      names_to = "type_visit", 
      values_to = "mean", 
      names_prefix = "mean_"
    ) %>% ggplot(aes(x = season, y = mean)) + 
  geom_col() +  
  facet_grid(~type_visit) + theme(axis.text.x = element_text(angle = 90, vjust = 0.5, hjust = 1)) + 
  labs(title = "Visitation South by Season", 
       x = "Season", 
       y = "Average Visitation")
south_plot

West

region_data %>% filter(region == "west") %>% distinct(activities_name) %>% 
  head() %>% knitr::kable()
activities_name
Arts and Culture
Astronomy
Stargazing
Biking
Camping
Backcountry Camping
west_plot <- 
  region_data %>% 
  filter(region == "west") %>% 
  mutate(season = case_when(
      month %in% c(12, 1, 2) ~ "Winter", 
      month %in% c(3, 4, 5) ~ "Spring", 
      month %in% c(6, 7, 8) ~ "Summer", 
      TRUE ~ "Fall"
    )) %>% 
    group_by(season) %>% 
    summarize(
      mean_tent = mean(tent_campers, na.rm = TRUE), 
      mean_backcountry = mean(backcountry, na.rm = TRUE), 
      mean_rv = mean(rv_campers, na.rm = TRUE)
    ) %>% 
    pivot_longer(
      cols = starts_with("mean_"), 
      names_to = "type_visit", 
      values_to = "mean", 
      names_prefix = "mean_"
    ) %>% ggplot(aes(x = season, y = mean)) + 
  geom_col() +  
  facet_grid(~type_visit) + theme(axis.text.x = element_text(angle = 90, vjust = 0.5, hjust = 1)) + 
  labs(title = "Visitation in West by Season", 
       x = "Season", 
       y = "Average Visitation") 

northeast_plot + midwest_plot + south_plot + west_plot

Across all regions, it seems that summer has the highest visitation across regions, with the exception of backcountry hiking, which has a peak in spring.

For the Northeastern U.S. tent camping is by far the most popular visitation type. Interestingly, in this region of the United Sattes, there was hardly any visitation in winter in all three visitation types. This might be due to the fact that the Northeastern U.S. typically has colder temperatures and possibly snow during this time.

For the Midwestern U.S., tent camping is the most popular visitation type, especially in the summer, but it is closely followed by backcountry hiking in summer. RV camping does seem to be higher in the midwest than in the northeastern region, especially during the summer. As before, there is hardly any visitation in the winter, and actually none for rv camping. This again might be due to weather conditions not permitting outdoor activities such as these. For the Southern U.S., tent camping has the highest average visitation in the summer time, closely followed by rv camping. In the south, rv camping is much more popular than in other regions around the U.S. Backcountry is lower, but does have an almost even spread among the seasons. This could be due to the more temperate conditions in the winter and other times of the year, allowing individuals to enjoy this activity year round.

For the Western U.S., ten camping once again has the highest average visitation across all seasons. Interestingly, backcountry hiking and rv camping are equal in the summer time in this region. This region has some visitation in the winter, but less than the southern region.

Comparing Regions

region_data %>% group_by(region) %>% 
  summarize(avg_visitation = mean(recreation_visits, na.rm = TRUE)) %>% 
  arrange(desc(avg_visitation)) %>% knitr::kable()
region avg_visitation
west 149106.067
south 95613.721
northeast 83202.371
midwest 44547.512
u.s. territory 31867.809
NA 4264.793
region_long <- region_data %>%
  pivot_longer(cols = c(recreation_visits, tent_campers, rv_campers, backcountry), 
               names_to = "visit_type", 
               values_to = "count") 

The western region of the U.S. seems to have the highest average visitation of all regions, followed by the southern region.

Looking at the amount of parks in each region will be helpful to determine if this is due to the fact that there are more parks in this region, or if there is something else going on.

region_data %>% 
  group_by(region) %>% 
  summarize(parks = n_distinct(park_code)) %>% knitr::kable()
region parks
midwest 46
northeast 68
south 117
u.s. territory 7
west 116
NA 122

From this table, we can see that actually the southern region has the most amount of parks in this dataset, followed by the western region. So this trend is visitation is not just due to the amount of parks in a specific region.

region_data %>%
  group_by(region) %>%
  summarize(
    mean_tent = mean(tent_campers, na.rm = TRUE),
    mean_backcountry = mean(backcountry, na.rm = TRUE),
    mean_rv = mean(rv_campers, na.rm = TRUE)
  ) %>%
  pivot_longer(
    cols = starts_with("mean_"),
    names_to = "type_visit",
    values_to = "mean",
    names_prefix = "mean_"
  ) %>% ggplot(aes(x = region, y = mean)) + geom_col() + facet_grid(~type_visit) + theme(axis.text.x = element_text(angle = 90, vjust = 0.5, hjust = 1))

Now looking at the specific visits by region, we can see once again that west has the highest mean visitation among all the regions, with its highest being tent camping. Unsurprisingly, we see that the south has the second highest mean visitation. Interestingly, in backcountry hiking, the midwest has the second highest mean visitation, despite the fact that it has one of the lowest amount of parks in the dataset.

======= ggplot(aes(x = as.factor(month), y = total_visitation, color = park_type, group = park_type)) + geom_line(linewidth = 0.7)
## `summarise()` has grouped output by 'park_type'. You can override
## using the `.groups` argument.

visitation_data %>% 
  group_by(park_type, year) %>% 
  summarize(total_visitation = sum(recreation_visits)) %>% 
  ggplot(aes(x = as.factor(year), y = total_visitation, color = park_type, group = park_type)) + 
  geom_line(linewidth = 0.7) +  theme(axis.text.x = element_text(angle = 90, vjust = 0.5, hjust = 1))
## `summarise()` has grouped output by 'park_type'. You can override
## using the `.groups` argument.

>>>>>>> d7d1b02e51298d32c1ca0e195761937ed7ecbc12